from gg.ast import *
from gg.lib.graph import Graph
from gg.lib.wl import Worklist
from gg.ast.params import GraphParam
import cgen
G = Graph("graph")
WL = Worklist()
ast = Module([
CBlock([cgen.Include("pagerank_push_cuda.cuh", system = False)], parse = False),
Kernel("ResetGraph", [G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('float *', 'p_delta'), ('uint32_t *', 'p_nout'), ('float *', 'p_residual'), ('float *', 'p_value')],
[
ForAll("src", G.nodes("__begin", "__end"),
[
CDecl([("bool", "pop", " = src < __end")]),
If("pop", [
CBlock(["p_value[src]     = 0"]),
CBlock(["p_nout[src]      = 0"]),
CBlock(["p_residual[src]  = 0"]),
CBlock(["p_delta[src]     = 0"]),
]),
]),
]),
Kernel("InitializeGraph", [G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('const float ', 'local_alpha'), ('uint32_t *', 'p_nout'), ('float *', 'p_residual'), ('DynamicBitset&', 'bitset_nout')],
[
CDecl([("uint32_t", "num_edges", "")]),
ForAll("src", G.nodes("__begin", "__end"),
[
CDecl([("bool", "pop", " = src < __end")]),
If("pop", [
CBlock(["p_residual[src]  = local_alpha"]),
CBlock(["num_edges = graph.getOutDegree(src)"]),
CBlock(["atomicTestAdd(&p_nout[src], num_edges)"]),
CBlock(["bitset_nout.set(src)"]),
]),
]),
]),
Kernel("PageRank_delta", [G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('const float ', 'local_alpha'), ('float', 'local_tolerance'), ('float *', 'p_delta'), ('uint32_t *', 'p_nout'), ('float *', 'p_residual'), ('float *', 'p_value')],
[
CDecl([("float", "residual_old", "")]),
ForAll("src", G.nodes("__begin", "__end"),
[
CDecl([("bool", "pop", " = src < __end")]),
If("pop", [
If("p_residual[src] > 0",
[
CBlock(["residual_old = p_residual[src]"]),
CBlock(["p_residual[src]     = 0"]),
CBlock(["p_value[src] += residual_old"]),
If("residual_old > local_tolerance",
[
If("p_nout[src] > 0",
[
CBlock(["p_delta[src] = residual_old * (1 - local_alpha) / p_nout[src]"]),
]),
]),
]),
]),
]),
]),
Kernel("PageRank", [G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('float *', 'p_delta'), ('float *', 'p_residual'), ('DynamicBitset&', 'bitset_residual'), ('HGAccumulator<unsigned int>', 'active_vertices')],
[
CDecl([("float", "_delta", "")]),
CDecl([("__shared__ cub::BlockReduce<unsigned int, TB_SIZE>::TempStorage", "active_vertices_ts", "")]),
CBlock(["active_vertices.thread_entry()"]),
ForAll("src", G.nodes("__begin", "__end"),
[
CDecl([("bool", "pop", " = src < __end")]),
If("pop", [
If("p_delta[src] > 0",
[
CBlock(["_delta = p_delta[src]"]),
CBlock(["p_delta[src]  = 0"]),
CBlock(["active_vertices.reduce( 1)"]),
], [ CBlock(["pop = false"]), ]),
]),
UniformConditional(If("!pop", [CBlock("continue")]), uniform_only = False, _only_if_np = True),
ClosureHint(
ForAll("nbr", G.edges("src"),
[
CDecl([("index_type", "dst", "")]),
CBlock(["dst = graph.getAbsDestination(nbr)"]),
CBlock(["atomicTestAdd(&p_residual[dst], _delta)"]),
CBlock(["bitset_residual.set(dst)"]),
]),
),
]),
CBlock(["active_vertices.thread_exit<cub::BlockReduce<unsigned int, TB_SIZE> >(active_vertices_ts)"], parse = False),
]),
Kernel("PageRankSanity", [G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('float', 'local_tolerance'), ('float *', 'p_residual'), ('float *', 'p_value'), ('HGAccumulator<uint64_t>', 'DGAccumulator_residual_over_tolerance'), ('HGAccumulator<float>', 'DGAccumulator_sum'), ('HGAccumulator<float>', 'DGAccumulator_sum_residual'), ('HGReduceMax<float>', 'max_residual'), ('HGReduceMax<float>', 'max_value'), ('HGReduceMin<float>', 'min_residual'), ('HGReduceMin<float>', 'min_value')],
[
CDecl([("__shared__ cub::BlockReduce<uint64_t, TB_SIZE>::TempStorage", "DGAccumulator_residual_over_tolerance_ts", "")]),
CBlock(["DGAccumulator_residual_over_tolerance.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "DGAccumulator_sum_ts", "")]),
CBlock(["DGAccumulator_sum.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "DGAccumulator_sum_residual_ts", "")]),
CBlock(["DGAccumulator_sum_residual.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "max_residual_ts", "")]),
CBlock(["max_residual.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "max_value_ts", "")]),
CBlock(["max_value.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "min_residual_ts", "")]),
CBlock(["min_residual.thread_entry()"]),
CDecl([("__shared__ cub::BlockReduce<float, TB_SIZE>::TempStorage", "min_value_ts", "")]),
CBlock(["min_value.thread_entry()"]),
ForAll("src", G.nodes("__begin", "__end"),
[
CDecl([("bool", "pop", " = src < __end")]),
If("pop", [
CBlock(["max_value.reduce(p_value[src])"]),
CBlock(["min_value.reduce(p_value[src])"]),
CBlock(["max_residual.reduce(p_residual[src])"]),
CBlock(["min_residual.reduce(p_residual[src])"]),
CBlock(["DGAccumulator_sum.reduce( p_value[src])"]),
CBlock(["DGAccumulator_sum.reduce( p_residual[src])"]),
If("p_residual[src] > local_tolerance",
[
CBlock(["DGAccumulator_residual_over_tolerance.reduce( 1)"]),
]),
]),
]),
CBlock(["DGAccumulator_residual_over_tolerance.thread_exit<cub::BlockReduce<uint64_t, TB_SIZE> >(DGAccumulator_residual_over_tolerance_ts)"], parse = False),
CBlock(["DGAccumulator_sum.thread_exit<cub::BlockReduce<float, TB_SIZE> >(DGAccumulator_sum_ts)"], parse = False),
CBlock(["DGAccumulator_sum_residual.thread_exit<cub::BlockReduce<float, TB_SIZE> >(DGAccumulator_sum_residual_ts)"], parse = False),
CBlock(["max_residual.thread_exit<cub::BlockReduce<float, TB_SIZE> >(max_residual_ts)"], parse = False),
CBlock(["max_value.thread_exit<cub::BlockReduce<float, TB_SIZE> >(max_value_ts)"], parse = False),
CBlock(["min_residual.thread_exit<cub::BlockReduce<float, TB_SIZE> >(min_residual_ts)"], parse = False),
CBlock(["min_value.thread_exit<cub::BlockReduce<float, TB_SIZE> >(min_value_ts)"], parse = False),
]),
Kernel("ResetGraph_cuda", [('unsigned int ', '__begin'), ('unsigned int ', '__end'), ('struct CUDA_Context* ', 'ctx')],
[
CDecl([("dim3", "blocks", "")]),
CDecl([("dim3", "threads", "")]),
CBlock(["kernel_sizing(blocks, threads)"]),
Invoke("ResetGraph", ("ctx->gg", "__begin", "__end", "ctx->delta.data.gpu_wr_ptr()", "ctx->nout.data.gpu_wr_ptr()", "ctx->residual.data.gpu_wr_ptr()", "ctx->value.data.gpu_wr_ptr()")),
CBlock(["check_cuda_kernel"], parse = False),
], host = True),
Kernel("ResetGraph_allNodes_cuda", [('struct CUDA_Context* ', 'ctx')],
[
CBlock(["ResetGraph_cuda(0, ctx->gg.nnodes, ctx)"]),
], host = True),
Kernel("ResetGraph_masterNodes_cuda", [('struct CUDA_Context* ', 'ctx')],
[
CBlock(["ResetGraph_cuda(ctx->beginMaster, ctx->beginMaster + ctx->numOwned, ctx)"]),
], host = True),
Kernel("ResetGraph_nodesWithEdges_cuda", [('struct CUDA_Context* ', 'ctx')],
[
CBlock(["ResetGraph_cuda(0, ctx->numNodesWithEdges, ctx)"]),
], host = True),
Kernel("InitializeGraph_cuda", [('unsigned int ', '__begin'), ('unsigned int ', '__end'), ('const float &', 'local_alpha'), ('struct CUDA_Context* ', 'ctx')],
[
CDecl([("dim3", "blocks", "")]),
CDecl([("dim3", "threads", "")]),
CBlock(["kernel_sizing(blocks, threads)"]),
Invoke("InitializeGraph", ("ctx->gg", "__begin", "__end", "local_alpha", "ctx->nout.data.gpu_wr_ptr()", "ctx->residual.data.gpu_wr_ptr()", "*(ctx->nout.is_updated.gpu_rd_ptr())")),
CBlock(["check_cuda_kernel"], parse = False),
], host = True),
Kernel("InitializeGraph_allNodes_cuda", [('const float &', 'local_alpha'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["InitializeGraph_cuda(0, ctx->gg.nnodes, local_alpha, ctx)"]),
], host = True),
Kernel("InitializeGraph_masterNodes_cuda", [('const float &', 'local_alpha'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["InitializeGraph_cuda(ctx->beginMaster, ctx->beginMaster + ctx->numOwned, local_alpha, ctx)"]),
], host = True),
Kernel("InitializeGraph_nodesWithEdges_cuda", [('const float &', 'local_alpha'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["InitializeGraph_cuda(0, ctx->numNodesWithEdges, local_alpha, ctx)"]),
], host = True),
Kernel("PageRank_delta_cuda", [('unsigned int ', '__begin'), ('unsigned int ', '__end'), ('const float &', 'local_alpha'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CDecl([("dim3", "blocks", "")]),
CDecl([("dim3", "threads", "")]),
CBlock(["kernel_sizing(blocks, threads)"]),
Invoke("PageRank_delta", ("ctx->gg", "__begin", "__end", "local_alpha", "local_tolerance", "ctx->delta.data.gpu_wr_ptr()", "ctx->nout.data.gpu_wr_ptr()", "ctx->residual.data.gpu_wr_ptr()", "ctx->value.data.gpu_wr_ptr()")),
CBlock(["check_cuda_kernel"], parse = False),
], host = True),
Kernel("PageRank_delta_allNodes_cuda", [('const float &', 'local_alpha'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_delta_cuda(0, ctx->gg.nnodes, local_alpha, local_tolerance, ctx)"]),
], host = True),
Kernel("PageRank_delta_masterNodes_cuda", [('const float &', 'local_alpha'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_delta_cuda(ctx->beginMaster, ctx->beginMaster + ctx->numOwned, local_alpha, local_tolerance, ctx)"]),
], host = True),
Kernel("PageRank_delta_nodesWithEdges_cuda", [('const float &', 'local_alpha'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_delta_cuda(0, ctx->numNodesWithEdges, local_alpha, local_tolerance, ctx)"]),
], host = True),
Kernel("PageRank_cuda", [('unsigned int ', '__begin'), ('unsigned int ', '__end'), ('unsigned int &', 'active_vertices'), ('struct CUDA_Context* ', 'ctx')],
[
CDecl([("dim3", "blocks", "")]),
CDecl([("dim3", "threads", "")]),
CBlock(["kernel_sizing(blocks, threads)"]),
CDecl([("Shared<unsigned int>", "active_verticesval", " = Shared<unsigned int>(1)")]),
CDecl([("HGAccumulator<unsigned int>", "_active_vertices", "")]),
CBlock(["*(active_verticesval.cpu_wr_ptr()) = 0"]),
CBlock(["_active_vertices.rv = active_verticesval.gpu_wr_ptr()"]),
Invoke("PageRank", ("ctx->gg", "__begin", "__end", "ctx->delta.data.gpu_wr_ptr()", "ctx->residual.data.gpu_wr_ptr()", "*(ctx->residual.is_updated.gpu_rd_ptr())", "_active_vertices")),
CBlock(["check_cuda_kernel"], parse = False),
CBlock(["active_vertices = *(active_verticesval.cpu_rd_ptr())"]),
], host = True),
Kernel("PageRank_allNodes_cuda", [('unsigned int &', 'active_vertices'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_cuda(0, ctx->gg.nnodes, active_vertices, ctx)"]),
], host = True),
Kernel("PageRank_masterNodes_cuda", [('unsigned int &', 'active_vertices'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_cuda(ctx->beginMaster, ctx->beginMaster + ctx->numOwned, active_vertices, ctx)"]),
], host = True),
Kernel("PageRank_nodesWithEdges_cuda", [('unsigned int &', 'active_vertices'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRank_cuda(0, ctx->numNodesWithEdges, active_vertices, ctx)"]),
], host = True),
Kernel("PageRankSanity_cuda", [('unsigned int ', '__begin'), ('unsigned int ', '__end'), ('uint64_t &', 'DGAccumulator_residual_over_tolerance'), ('float &', 'DGAccumulator_sum'), ('float &', 'DGAccumulator_sum_residual'), ('float &', 'max_residual'), ('float &', 'max_value'), ('float &', 'min_residual'), ('float &', 'min_value'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CDecl([("dim3", "blocks", "")]),
CDecl([("dim3", "threads", "")]),
CBlock(["kernel_sizing(blocks, threads)"]),
CDecl([("Shared<uint64_t>", "DGAccumulator_residual_over_toleranceval", " = Shared<uint64_t>(1)")]),
CDecl([("HGAccumulator<uint64_t>", "_DGAccumulator_residual_over_tolerance", "")]),
CBlock(["*(DGAccumulator_residual_over_toleranceval.cpu_wr_ptr()) = 0"]),
CBlock(["_DGAccumulator_residual_over_tolerance.rv = DGAccumulator_residual_over_toleranceval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "DGAccumulator_sumval", " = Shared<float>(1)")]),
CDecl([("HGAccumulator<float>", "_DGAccumulator_sum", "")]),
CBlock(["*(DGAccumulator_sumval.cpu_wr_ptr()) = 0"]),
CBlock(["_DGAccumulator_sum.rv = DGAccumulator_sumval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "DGAccumulator_sum_residualval", " = Shared<float>(1)")]),
CDecl([("HGAccumulator<float>", "_DGAccumulator_sum_residual", "")]),
CBlock(["*(DGAccumulator_sum_residualval.cpu_wr_ptr()) = 0"]),
CBlock(["_DGAccumulator_sum_residual.rv = DGAccumulator_sum_residualval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "max_residualval", " = Shared<float>(1)")]),
CDecl([("HGReduceMax<float>", "_max_residual", "")]),
CBlock(["*(max_residualval.cpu_wr_ptr()) = 0"]),
CBlock(["_max_residual.rv = max_residualval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "max_valueval", " = Shared<float>(1)")]),
CDecl([("HGReduceMax<float>", "_max_value", "")]),
CBlock(["*(max_valueval.cpu_wr_ptr()) = 0"]),
CBlock(["_max_value.rv = max_valueval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "min_residualval", " = Shared<float>(1)")]),
CDecl([("HGReduceMin<float>", "_min_residual", "")]),
CBlock(["*(min_residualval.cpu_wr_ptr()) = 1073741823"]),
CBlock(["_min_residual.rv = min_residualval.gpu_wr_ptr()"]),
CDecl([("Shared<float>", "min_valueval", " = Shared<float>(1)")]),
CDecl([("HGReduceMin<float>", "_min_value", "")]),
CBlock(["*(min_valueval.cpu_wr_ptr()) = 1073741823"]),
CBlock(["_min_value.rv = min_valueval.gpu_wr_ptr()"]),
Invoke("PageRankSanity", ("ctx->gg", "__begin", "__end", "local_tolerance", "ctx->residual.data.gpu_wr_ptr()", "ctx->value.data.gpu_wr_ptr()", "_DGAccumulator_residual_over_tolerance", "_DGAccumulator_sum", "_DGAccumulator_sum_residual", "_max_residual", "_max_value", "_min_residual", "_min_value")),
CBlock(["check_cuda_kernel"], parse = False),
CBlock(["DGAccumulator_residual_over_tolerance = *(DGAccumulator_residual_over_toleranceval.cpu_rd_ptr())"]),
CBlock(["DGAccumulator_sum = *(DGAccumulator_sumval.cpu_rd_ptr())"]),
CBlock(["DGAccumulator_sum_residual = *(DGAccumulator_sum_residualval.cpu_rd_ptr())"]),
CBlock(["max_residual = *(max_residualval.cpu_rd_ptr())"]),
CBlock(["max_value = *(max_valueval.cpu_rd_ptr())"]),
CBlock(["min_residual = *(min_residualval.cpu_rd_ptr())"]),
CBlock(["min_value = *(min_valueval.cpu_rd_ptr())"]),
], host = True),
Kernel("PageRankSanity_allNodes_cuda", [('uint64_t &', 'DGAccumulator_residual_over_tolerance'), ('float &', 'DGAccumulator_sum'), ('float &', 'DGAccumulator_sum_residual'), ('float &', 'max_residual'), ('float &', 'max_value'), ('float &', 'min_residual'), ('float &', 'min_value'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRankSanity_cuda(0, ctx->gg.nnodes, DGAccumulator_residual_over_tolerance, DGAccumulator_sum, DGAccumulator_sum_residual, max_residual, max_value, min_residual, min_value, local_tolerance, ctx)"]),
], host = True),
Kernel("PageRankSanity_masterNodes_cuda", [('uint64_t &', 'DGAccumulator_residual_over_tolerance'), ('float &', 'DGAccumulator_sum'), ('float &', 'DGAccumulator_sum_residual'), ('float &', 'max_residual'), ('float &', 'max_value'), ('float &', 'min_residual'), ('float &', 'min_value'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRankSanity_cuda(ctx->beginMaster, ctx->beginMaster + ctx->numOwned, DGAccumulator_residual_over_tolerance, DGAccumulator_sum, DGAccumulator_sum_residual, max_residual, max_value, min_residual, min_value, local_tolerance, ctx)"]),
], host = True),
Kernel("PageRankSanity_nodesWithEdges_cuda", [('uint64_t &', 'DGAccumulator_residual_over_tolerance'), ('float &', 'DGAccumulator_sum'), ('float &', 'DGAccumulator_sum_residual'), ('float &', 'max_residual'), ('float &', 'max_value'), ('float &', 'min_residual'), ('float &', 'min_value'), ('float', 'local_tolerance'), ('struct CUDA_Context* ', 'ctx')],
[
CBlock(["PageRankSanity_cuda(0, ctx->numNodesWithEdges, DGAccumulator_residual_over_tolerance, DGAccumulator_sum, DGAccumulator_sum_residual, max_residual, max_value, min_residual, min_value, local_tolerance, ctx)"]),
], host = True),
])
